In [1]:
import pandas as pd

df = pd.read_csv("../../../linux/git_demo_timestamp.csv", sep=";")
df.author.value_counts().head(10)


Out[1]:
Linus Torvalds           24259
David S. Miller           9563
Mark Brown                6917
Takashi Iwai              6293
Al Viro                   6064
H Hartley Sweeten         5942
Ingo Molnar               5462
Mauro Carvalho Chehab     5384
Arnd Bergmann             5305
Greg Kroah-Hartman        4687
Name: author, dtype: int64

In [2]:
%matplotlib inline
df.author.value_counts().head(10).plot(kind='pie', figsize=(8,8), label="")


Out[2]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fef79256a0>

In [3]:
df['timestamp_local'] = pd.to_datetime(df['timestamp_local'])
df.timestamp_local.describe()


Out[3]:
count                  723214
unique                 691746
top       2017-11-01 03:56:19
freq                      137
first     2005-04-16 15:20:36
last      2017-12-31 16:52:15
Name: timestamp_local, dtype: object

In [4]:
df.timestamp_local.dt.year.value_counts(sort=False).plot(kind='bar')


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fef7a510b8>

In [5]:
df.timestamp_local.dt.hour.value_counts(sort=False).plot(kind='bar')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fef6ea6080>

Alternative: Using original dataset


In [48]:
import pandas as pd

df = pd.read_csv(
    r"C:/Temp/git_time_unix.csv",
    sep="\t",
    encoding='latin-1',
    names = ['time', 'unix']
)
#df['unix_time'] = pd.to_datetime(df['unix'], unit="s")
#df['read_time'] = pd.to_datetime(df['time'])

df.head()


Out[48]:
time unix
0 Sun Dec 31 14:47:43 2017 -0800 1514760463
1 Sun Dec 31 13:13:56 2017 -0800 1514754836
2 Sun Dec 31 13:03:05 2017 -0800 1514754185
3 Sun Dec 31 12:30:34 2017 -0800 1514752234
4 Sun Dec 31 12:29:02 2017 -0800 1514752142

In [61]:
pd.b


Out[61]:
<module 'pandas.api' from 'C:\\dev\\apps\\Anaconda3\\lib\\site-packages\\pandas\\api\\__init__.py'>

In [57]:
df[['timestamp', 'timezone']] = df.time.str.rsplit(" ", n=1, expand=True)
df['timestamp'] = pd.to_datetime(df.timestamp)
df.head()


Out[57]:
time unix timestamp timezone
0 Sun Dec 31 14:47:43 2017 -0800 1514760463 2017-12-31 14:47:43 -0800
1 Sun Dec 31 13:13:56 2017 -0800 1514754836 2017-12-31 13:13:56 -0800
2 Sun Dec 31 13:03:05 2017 -0800 1514754185 2017-12-31 13:03:05 -0800
3 Sun Dec 31 12:30:34 2017 -0800 1514752234 2017-12-31 12:30:34 -0800
4 Sun Dec 31 12:29:02 2017 -0800 1514752142 2017-12-31 12:29:02 -0800

In [ ]:


In [33]:
t = pd.to_datetime(df.timeinfo.str.split(" ", expand=True)[0], unit="s")
t.value_counts()


Out[33]:
2017-10-31 17:56:19    137
2008-09-04 05:30:19     99
2005-09-12 16:49:24     58
2008-11-18 09:48:22     56
2008-07-11 23:27:31     56
2016-02-22 12:13:28     55
2009-01-07 15:14:39     54
2016-07-09 00:41:01     52
2015-06-19 22:00:46     48
2014-09-02 17:25:26     45
2016-02-24 09:14:07     42
2008-01-29 04:58:27     40
2005-06-21 04:15:16     40
2009-01-07 15:14:38     40
2009-02-04 08:49:45     39
2017-05-19 13:59:35     35
2007-06-15 22:44:13     32
2016-11-04 07:20:36     30
2011-05-25 09:34:52     28
2016-03-11 03:09:28     26
2005-11-05 16:25:54     26
2007-09-19 04:38:12     25
2013-09-28 09:31:00     25
2008-05-21 19:52:33     24
2016-04-08 07:24:40     24
2015-01-31 11:11:54     23
2013-12-11 18:45:00     23
2013-09-19 12:59:00     22
2008-04-09 02:20:00     22
2007-07-18 17:58:02     22
                      ... 
2014-10-20 19:03:57      1
2017-02-22 23:42:49      1
2005-08-16 03:34:48      1
2017-09-07 23:36:39      1
2007-07-09 13:38:41      1
2006-09-27 07:48:46      1
2014-08-28 03:08:06      1
2015-03-30 08:12:55      1
2011-08-26 10:27:35      1
2012-12-21 14:06:16      1
2009-08-28 00:54:53      1
2014-09-08 21:39:52      1
2015-04-24 14:16:23      1
2015-05-07 14:21:27      1
2012-04-30 04:09:17      1
2007-12-19 13:03:19      1
2011-05-25 00:12:15      1
2015-07-15 03:01:48      1
2005-06-23 07:07:50      1
2009-04-18 15:19:26      1
2012-03-07 14:17:13      1
2015-10-27 09:27:47      1
2007-05-24 23:41:50      1
2007-12-06 14:26:38      1
2011-06-28 01:28:38      1
2011-07-20 04:54:07      1
2010-06-10 16:12:46      1
2008-05-27 08:18:46      1
2015-10-01 04:12:32      1
2015-10-02 12:58:32      1
Name: 0, Length: 692468, dtype: int64